Importing Libraries
library(lubridate)
## Loading required package: timechange
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ lubridate::intersect() masks base::intersect()
## ✖ dplyr::lag() masks stats::lag()
## ✖ lubridate::setdiff() masks base::setdiff()
## ✖ lubridate::union() masks base::union()
library(dplyr)
library(skimr)
library(stringr)
library(treemap)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(viridis)
## Loading required package: viridisLite
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
# Importing csv files needed for the project
epl <- read.csv("~/STA-418-518/epl1.csv")
laliga <- read.csv("~/STA-418-518/laliga1.csv")
league <- read.csv("~/STA-418-518/league.csv")
country <- read.csv("~/STA-418-518/country.csv")
players <- read.csv("~/STA-418-518/players.csv")
teamNames <- read.csv("~/STA-418-518/teamnames.csv")
eplman <- read.csv("~/STA-418-518/EPL MAN.csv")
laligaman <- read.csv("~/STA-418-518/LaLiga Managers.csv")
eplref <- read.csv("~/STA-418-518/EplRef.csv")
laligaref <- read.csv("~/STA-418-518/Laliga Ref.csv")
matches <- rbind(epl, laliga) # Combining data for EPL and LaLiga games together and naming it as matches.
# Creating two new variables in the matches dataframe called "Country" and "League".
matches['Country'] <- NA
matches['League'] <- NA
# Going through each rows in the data frame to match country and league ID with the names of the country and league and substituting ids with those names in the matches table.
for(i in 1:nrow(matches)){
matches$Country[i] = country[country["id"] == matches$country_id[i]][2]
}
for(i in 1:nrow(matches)){
matches$League[i] = league[league["id"] == matches$league_id[i]][3]
}
# Creating two new variables in the matches dataframe called "HomeTeam" and "AwayTeam".
matches['HomeTeam'] <- NA
matches['AwayTeam'] <- NA
# Going through each rows in the dataframe to match home and away team ID with the original name of the teams and placing those names in the matches table's HomeTeam and AwayTeam columns.
for(i in 1:nrow(matches)){
matches$HomeTeam[i] = teamNames[teamNames["team_api_id"] == matches$home_team_api_id[i]][4]
}
for(i in 1:nrow(matches)){
matches$AwayTeam[i] = teamNames[teamNames["team_api_id"] == matches$away_team_api_id[i]][4]
}
# To see if there are any missing values in any of the columns.
skim(matches)
Data summary
| Name |
matches |
| Number of rows |
6080 |
| Number of columns |
136 |
| _______________________ |
|
| Column type frequency: |
|
| character |
14 |
| numeric |
122 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| season |
0 |
1 |
9 |
9 |
0 |
8 |
0 |
| date |
0 |
1 |
8 |
10 |
0 |
1095 |
0 |
| goal |
0 |
1 |
8 |
4694 |
0 |
5665 |
0 |
| shoton |
0 |
1 |
10 |
8399 |
0 |
4675 |
0 |
| shotoff |
0 |
1 |
11 |
8002 |
0 |
4674 |
0 |
| foulcommit |
0 |
1 |
14 |
18077 |
0 |
4675 |
0 |
| card |
0 |
1 |
8 |
4849 |
0 |
5883 |
0 |
| cross |
0 |
1 |
9 |
22748 |
0 |
4675 |
0 |
| corner |
0 |
1 |
10 |
7838 |
0 |
4674 |
0 |
| possession |
0 |
1 |
14 |
6520 |
0 |
4664 |
0 |
| Country |
0 |
1 |
5 |
7 |
0 |
2 |
0 |
| League |
0 |
1 |
15 |
22 |
0 |
2 |
0 |
| HomeTeam |
0 |
1 |
6 |
25 |
0 |
67 |
0 |
| AwayTeam |
0 |
1 |
6 |
25 |
0 |
67 |
0 |
Variable type: numeric
| country_id |
0 |
1.00 |
11623.50 |
9895.31 |
1729.00 |
1729.00 |
11623.50 |
21518.00 |
21518.00 |
▇▁▁▁▇ |
| league_id |
0 |
1.00 |
11623.50 |
9895.31 |
1729.00 |
1729.00 |
11623.50 |
21518.00 |
21518.00 |
▇▁▁▁▇ |
| stage |
0 |
1.00 |
19.50 |
10.97 |
1.00 |
10.00 |
19.50 |
29.00 |
38.00 |
▇▇▇▇▇ |
| match_api_id |
0 |
1.00 |
1196727.34 |
493303.98 |
489042.00 |
801149.75 |
1140128.50 |
1560638.75 |
2030537.00 |
▇▇▃▃▇ |
| home_team_api_id |
0 |
1.00 |
9087.28 |
768.54 |
7869.00 |
8472.00 |
8654.00 |
9869.00 |
10281.00 |
▂▇▁▁▆ |
| away_team_api_id |
0 |
1.00 |
9087.28 |
768.54 |
7869.00 |
8472.00 |
8654.00 |
9869.00 |
10281.00 |
▂▇▁▁▆ |
| home_team_goal |
0 |
1.00 |
1.59 |
1.35 |
0.00 |
1.00 |
1.00 |
2.00 |
10.00 |
▇▂▁▁▁ |
| away_team_goal |
0 |
1.00 |
1.15 |
1.15 |
0.00 |
0.00 |
1.00 |
2.00 |
8.00 |
▇▃▁▁▁ |
| home_player_X1 |
0 |
1.00 |
1.00 |
0.00 |
1.00 |
1.00 |
1.00 |
1.00 |
1.00 |
▁▁▇▁▁ |
| home_player_X2 |
0 |
1.00 |
2.02 |
0.26 |
1.00 |
2.00 |
2.00 |
2.00 |
8.00 |
▇▁▁▁▁ |
| home_player_X3 |
0 |
1.00 |
4.02 |
0.26 |
1.00 |
4.00 |
4.00 |
4.00 |
8.00 |
▁▁▇▁▁ |
| home_player_X4 |
0 |
1.00 |
6.01 |
0.31 |
2.00 |
6.00 |
6.00 |
6.00 |
8.00 |
▁▁▁▇▁ |
| home_player_X5 |
0 |
1.00 |
7.85 |
0.91 |
1.00 |
8.00 |
8.00 |
8.00 |
9.00 |
▁▁▁▁▇ |
| home_player_X6 |
0 |
1.00 |
3.19 |
1.18 |
1.00 |
2.00 |
4.00 |
4.00 |
9.00 |
▅▇▁▁▁ |
| home_player_X7 |
0 |
1.00 |
4.94 |
1.09 |
2.00 |
4.00 |
5.00 |
6.00 |
8.00 |
▁▆▂▇▁ |
| home_player_X8 |
0 |
1.00 |
4.73 |
1.64 |
2.00 |
3.00 |
5.00 |
6.00 |
9.00 |
▇▁▇▂▁ |
| home_player_X9 |
0 |
1.00 |
5.93 |
1.75 |
1.00 |
5.00 |
5.00 |
8.00 |
9.00 |
▁▂▇▁▆ |
| home_player_X10 |
0 |
1.00 |
5.75 |
1.49 |
3.00 |
4.00 |
5.00 |
7.00 |
9.00 |
▆▃▁▇▁ |
| home_player_X11 |
0 |
1.00 |
5.54 |
0.70 |
1.00 |
5.00 |
5.00 |
6.00 |
7.00 |
▁▁▁▇▆ |
| away_player_X1 |
0 |
1.00 |
1.00 |
0.01 |
1.00 |
1.00 |
1.00 |
1.00 |
2.00 |
▇▁▁▁▁ |
| away_player_X2 |
0 |
1.00 |
2.03 |
0.28 |
1.00 |
2.00 |
2.00 |
2.00 |
8.00 |
▇▁▁▁▁ |
| away_player_X3 |
0 |
1.00 |
4.03 |
0.31 |
2.00 |
4.00 |
4.00 |
4.00 |
8.00 |
▁▇▁▁▁ |
| away_player_X4 |
0 |
1.00 |
6.01 |
0.36 |
1.00 |
6.00 |
6.00 |
6.00 |
8.00 |
▁▁▁▇▁ |
| away_player_X5 |
0 |
1.00 |
7.81 |
1.03 |
1.00 |
8.00 |
8.00 |
8.00 |
9.00 |
▁▁▁▁▇ |
| away_player_X6 |
0 |
1.00 |
3.21 |
1.25 |
1.00 |
2.00 |
4.00 |
4.00 |
9.00 |
▅▇▁▁▁ |
| away_player_X7 |
0 |
1.00 |
4.91 |
1.15 |
2.00 |
4.00 |
5.00 |
6.00 |
8.00 |
▂▆▂▇▁ |
| away_player_X8 |
0 |
1.00 |
4.73 |
1.64 |
2.00 |
3.00 |
5.00 |
6.00 |
9.00 |
▇▁▇▂▁ |
| away_player_X9 |
0 |
1.00 |
5.85 |
1.73 |
1.00 |
5.00 |
5.00 |
8.00 |
9.00 |
▁▂▇▂▆ |
| away_player_X10 |
0 |
1.00 |
5.91 |
1.55 |
1.00 |
4.00 |
7.00 |
7.00 |
9.00 |
▁▅▃▇▂ |
| away_player_X11 |
1 |
1.00 |
5.52 |
0.71 |
3.00 |
5.00 |
5.00 |
6.00 |
7.00 |
▁▁▇▃▂ |
| home_player_Y1 |
0 |
1.00 |
1.00 |
0.00 |
1.00 |
1.00 |
1.00 |
1.00 |
1.00 |
▁▁▇▁▁ |
| home_player_Y2 |
0 |
1.00 |
3.00 |
0.00 |
3.00 |
3.00 |
3.00 |
3.00 |
3.00 |
▁▁▇▁▁ |
| home_player_Y3 |
0 |
1.00 |
3.00 |
0.00 |
3.00 |
3.00 |
3.00 |
3.00 |
3.00 |
▁▁▇▁▁ |
| home_player_Y4 |
0 |
1.00 |
3.00 |
0.00 |
3.00 |
3.00 |
3.00 |
3.00 |
3.00 |
▁▁▇▁▁ |
| home_player_Y5 |
0 |
1.00 |
3.07 |
0.51 |
3.00 |
3.00 |
3.00 |
3.00 |
7.00 |
▇▁▁▁▁ |
| home_player_Y6 |
0 |
1.00 |
6.45 |
0.64 |
3.00 |
6.00 |
6.00 |
7.00 |
8.00 |
▁▁▇▇▁ |
| home_player_Y7 |
0 |
1.00 |
6.56 |
0.55 |
5.00 |
6.00 |
7.00 |
7.00 |
8.00 |
▁▇▁▇▁ |
| home_player_Y8 |
0 |
1.00 |
7.44 |
0.55 |
3.00 |
7.00 |
7.00 |
8.00 |
9.00 |
▁▁▁▇▇ |
| home_player_Y9 |
0 |
1.00 |
7.84 |
0.93 |
6.00 |
7.00 |
8.00 |
8.00 |
10.00 |
▁▇▇▁▂ |
| home_player_Y10 |
0 |
1.00 |
8.89 |
1.05 |
6.00 |
8.00 |
9.00 |
10.00 |
11.00 |
▁▇▁▇▁ |
| home_player_Y11 |
0 |
1.00 |
10.58 |
0.51 |
1.00 |
10.00 |
11.00 |
11.00 |
11.00 |
▁▁▁▁▇ |
| away_player_Y1 |
0 |
1.00 |
1.00 |
0.03 |
1.00 |
1.00 |
1.00 |
1.00 |
3.00 |
▇▁▁▁▁ |
| away_player_Y2 |
0 |
1.00 |
3.00 |
0.00 |
3.00 |
3.00 |
3.00 |
3.00 |
3.00 |
▁▁▇▁▁ |
| away_player_Y3 |
0 |
1.00 |
3.00 |
0.00 |
3.00 |
3.00 |
3.00 |
3.00 |
3.00 |
▁▁▇▁▁ |
| away_player_Y4 |
0 |
1.00 |
3.00 |
0.00 |
3.00 |
3.00 |
3.00 |
3.00 |
3.00 |
▁▁▇▁▁ |
| away_player_Y5 |
0 |
1.00 |
3.08 |
0.57 |
3.00 |
3.00 |
3.00 |
3.00 |
7.00 |
▇▁▁▁▁ |
| away_player_Y6 |
0 |
1.00 |
6.44 |
0.67 |
3.00 |
6.00 |
6.00 |
7.00 |
8.00 |
▁▁▇▇▁ |
| away_player_Y7 |
0 |
1.00 |
6.58 |
0.56 |
3.00 |
6.00 |
7.00 |
7.00 |
8.00 |
▁▁▆▇▁ |
| away_player_Y8 |
0 |
1.00 |
7.44 |
0.55 |
5.00 |
7.00 |
7.00 |
8.00 |
9.00 |
▁▁▇▇▁ |
| away_player_Y9 |
0 |
1.00 |
7.87 |
0.96 |
6.00 |
7.00 |
8.00 |
8.00 |
10.00 |
▁▇▇▁▂ |
| away_player_Y10 |
0 |
1.00 |
8.79 |
1.08 |
6.00 |
8.00 |
8.00 |
10.00 |
11.00 |
▂▇▁▇▁ |
| away_player_Y11 |
1 |
1.00 |
10.61 |
0.49 |
8.00 |
10.00 |
11.00 |
11.00 |
11.00 |
▁▁▁▅▇ |
| home_player_1 |
0 |
1.00 |
63473.61 |
69077.06 |
2984.00 |
30660.00 |
34382.00 |
69650.00 |
532942.00 |
▇▁▁▁▁ |
| home_player_2 |
0 |
1.00 |
91024.26 |
94497.68 |
2802.00 |
31303.00 |
37754.00 |
150466.00 |
690308.00 |
▇▂▁▁▁ |
| home_player_3 |
0 |
1.00 |
75413.36 |
90512.62 |
2752.00 |
26209.00 |
34193.00 |
77741.00 |
643570.00 |
▇▁▁▁▁ |
| home_player_4 |
0 |
1.00 |
76322.77 |
85980.81 |
2752.00 |
27668.00 |
37440.00 |
93458.00 |
580589.00 |
▇▂▁▁▁ |
| home_player_5 |
0 |
1.00 |
88639.59 |
96614.40 |
2752.00 |
31921.00 |
40006.00 |
111930.00 |
693138.00 |
▇▂▁▁▁ |
| home_player_6 |
0 |
1.00 |
81478.15 |
88240.96 |
2802.00 |
30893.00 |
38469.00 |
109621.00 |
722766.00 |
▇▂▁▁▁ |
| home_player_7 |
0 |
1.00 |
82098.65 |
89122.79 |
2802.00 |
30598.00 |
38609.00 |
109621.00 |
683450.00 |
▇▂▁▁▁ |
| home_player_8 |
0 |
1.00 |
87449.87 |
98323.68 |
2802.00 |
30876.00 |
38807.00 |
109058.00 |
683450.00 |
▇▂▁▁▁ |
| home_player_9 |
0 |
1.00 |
87782.95 |
99774.21 |
2770.00 |
30892.00 |
38433.00 |
114030.50 |
722766.00 |
▇▂▁▁▁ |
| home_player_10 |
0 |
1.00 |
86419.61 |
100797.74 |
2802.00 |
30840.00 |
38460.00 |
109491.00 |
742405.00 |
▇▂▁▁▁ |
| home_player_11 |
0 |
1.00 |
75563.14 |
88766.28 |
2802.00 |
30830.00 |
38044.00 |
75445.00 |
696365.00 |
▇▂▁▁▁ |
| away_player_1 |
0 |
1.00 |
63648.58 |
69294.88 |
2796.00 |
30657.00 |
33986.00 |
69650.00 |
532942.00 |
▇▁▁▁▁ |
| away_player_2 |
0 |
1.00 |
91852.77 |
95324.49 |
2790.00 |
31306.00 |
38067.00 |
150480.00 |
706985.00 |
▇▂▁▁▁ |
| away_player_3 |
0 |
1.00 |
73553.91 |
86346.81 |
2752.00 |
26552.00 |
34193.00 |
75395.00 |
643570.00 |
▇▁▁▁▁ |
| away_player_4 |
0 |
1.00 |
78355.81 |
90777.60 |
2752.00 |
26777.00 |
37451.00 |
97491.00 |
684723.00 |
▇▂▁▁▁ |
| away_player_5 |
0 |
1.00 |
87964.06 |
95279.45 |
2790.00 |
31291.00 |
40006.00 |
111865.00 |
693138.00 |
▇▂▁▁▁ |
| away_player_6 |
0 |
1.00 |
82426.76 |
88614.62 |
2802.00 |
30889.00 |
38746.00 |
109898.00 |
722766.00 |
▇▂▁▁▁ |
| away_player_7 |
0 |
1.00 |
82675.95 |
89581.30 |
2802.00 |
30655.00 |
38818.00 |
111019.50 |
750435.00 |
▇▂▁▁▁ |
| away_player_8 |
0 |
1.00 |
88887.74 |
101262.21 |
2802.00 |
30871.00 |
38807.00 |
108568.00 |
710807.00 |
▇▂▁▁▁ |
| away_player_9 |
0 |
1.00 |
88430.10 |
102085.92 |
2802.00 |
30893.00 |
38433.00 |
111990.00 |
722766.00 |
▇▂▁▁▁ |
| away_player_10 |
0 |
1.00 |
88475.45 |
102987.62 |
2770.00 |
30853.00 |
38570.00 |
110189.00 |
722766.00 |
▇▂▁▁▁ |
| away_player_11 |
0 |
1.00 |
77186.12 |
89655.95 |
2802.00 |
30830.00 |
38133.50 |
96509.00 |
696365.00 |
▇▂▁▁▁ |
| B365H |
1 |
1.00 |
2.73 |
2.12 |
1.04 |
1.67 |
2.10 |
2.80 |
26.00 |
▇▁▁▁▁ |
| B365D |
1 |
1.00 |
4.06 |
1.46 |
2.50 |
3.30 |
3.50 |
4.20 |
17.00 |
▇▁▁▁▁ |
| B365A |
1 |
1.00 |
5.07 |
4.63 |
1.08 |
2.55 |
3.60 |
5.50 |
41.00 |
▇▁▁▁▁ |
| BWH |
2 |
1.00 |
2.65 |
1.95 |
1.03 |
1.65 |
2.10 |
2.75 |
34.00 |
▇▁▁▁▁ |
| BWD |
2 |
1.00 |
3.94 |
1.34 |
2.40 |
3.25 |
3.40 |
4.00 |
19.50 |
▇▁▁▁▁ |
| BWA |
2 |
1.00 |
4.74 |
4.08 |
1.10 |
2.50 |
3.50 |
5.25 |
51.00 |
▇▁▁▁▁ |
| IWH |
6 |
1.00 |
2.56 |
1.73 |
1.05 |
1.65 |
2.10 |
2.60 |
20.00 |
▇▁▁▁▁ |
| IWD |
6 |
1.00 |
3.79 |
1.01 |
2.50 |
3.30 |
3.40 |
3.90 |
11.00 |
▇▁▁▁▁ |
| IWA |
6 |
1.00 |
4.43 |
3.46 |
1.10 |
2.50 |
3.30 |
4.90 |
25.00 |
▇▁▁▁▁ |
| LBH |
3 |
1.00 |
2.64 |
1.91 |
1.04 |
1.66 |
2.10 |
2.75 |
26.00 |
▇▁▁▁▁ |
| LBD |
3 |
1.00 |
3.92 |
1.33 |
2.38 |
3.30 |
3.40 |
4.00 |
19.00 |
▇▁▁▁▁ |
| LBA |
3 |
1.00 |
4.72 |
4.10 |
1.10 |
2.50 |
3.50 |
5.00 |
51.00 |
▇▁▁▁▁ |
| PSH |
3044 |
0.50 |
2.89 |
2.53 |
1.04 |
1.66 |
2.15 |
2.99 |
36.00 |
▇▁▁▁▁ |
| PSD |
3044 |
0.50 |
4.41 |
2.15 |
3.04 |
3.44 |
3.69 |
4.44 |
29.00 |
▇▁▁▁▁ |
| PSA |
3044 |
0.50 |
5.47 |
5.41 |
1.09 |
2.55 |
3.78 |
5.82 |
47.50 |
▇▁▁▁▁ |
| WHH |
1 |
1.00 |
2.70 |
2.03 |
1.02 |
1.67 |
2.10 |
2.75 |
26.00 |
▇▁▁▁▁ |
| WHD |
1 |
1.00 |
3.81 |
1.23 |
2.38 |
3.20 |
3.30 |
3.80 |
17.00 |
▇▁▁▁▁ |
| WHA |
1 |
1.00 |
4.92 |
4.60 |
1.08 |
2.50 |
3.50 |
5.50 |
51.00 |
▇▁▁▁▁ |
| SJH |
1452 |
0.76 |
2.68 |
2.02 |
1.04 |
1.67 |
2.10 |
2.70 |
23.00 |
▇▁▁▁▁ |
| SJD |
1452 |
0.76 |
3.98 |
1.32 |
2.70 |
3.30 |
3.50 |
4.00 |
15.00 |
▇▁▁▁▁ |
| SJA |
1452 |
0.76 |
5.07 |
4.68 |
1.13 |
2.60 |
3.60 |
5.50 |
41.00 |
▇▁▁▁▁ |
| VCH |
2 |
1.00 |
2.79 |
2.33 |
1.03 |
1.67 |
2.10 |
2.88 |
36.00 |
▇▁▁▁▁ |
| VCD |
2 |
1.00 |
4.13 |
1.69 |
2.50 |
3.30 |
3.50 |
4.20 |
26.00 |
▇▁▁▁▁ |
| VCA |
2 |
1.00 |
5.31 |
5.48 |
1.08 |
2.60 |
3.60 |
5.50 |
67.00 |
▇▁▁▁▁ |
| GBH |
2288 |
0.62 |
2.59 |
1.78 |
1.05 |
1.67 |
2.10 |
2.60 |
17.00 |
▇▁▁▁▁ |
| GBD |
2288 |
0.62 |
3.83 |
1.10 |
2.75 |
3.25 |
3.40 |
3.80 |
11.00 |
▇▁▁▁▁ |
| GBA |
2288 |
0.62 |
4.63 |
3.67 |
1.12 |
2.60 |
3.50 |
5.00 |
34.00 |
▇▁▁▁▁ |
| BSH |
2281 |
0.62 |
2.59 |
1.80 |
1.04 |
1.67 |
2.10 |
2.62 |
17.00 |
▇▁▁▁▁ |
| BSD |
2281 |
0.62 |
3.82 |
1.09 |
2.75 |
3.25 |
3.40 |
3.80 |
13.00 |
▇▁▁▁▁ |
| BSA |
2281 |
0.62 |
4.73 |
3.94 |
1.14 |
2.60 |
3.50 |
5.00 |
34.00 |
▇▁▁▁▁ |
| on_target_shot_home_team |
0 |
1.00 |
4.88 |
3.98 |
0.00 |
1.00 |
5.00 |
7.00 |
26.00 |
▇▅▁▁▁ |
| on_target_shot_away_team |
0 |
1.00 |
3.86 |
3.31 |
0.00 |
1.00 |
4.00 |
6.00 |
19.00 |
▇▆▂▁▁ |
| off_target_shot_home_team |
0 |
1.00 |
4.91 |
3.80 |
0.00 |
1.00 |
5.00 |
7.00 |
21.00 |
▇▆▂▁▁ |
| off_target_shot_away_team |
0 |
1.00 |
3.87 |
3.15 |
0.00 |
1.00 |
4.00 |
6.00 |
19.00 |
▇▇▂▁▁ |
| foul_home_team |
0 |
1.00 |
8.98 |
6.11 |
0.00 |
5.00 |
10.00 |
13.00 |
32.00 |
▆▇▆▁▁ |
| foul_away_team |
0 |
1.00 |
9.35 |
6.27 |
0.00 |
5.00 |
10.00 |
14.00 |
37.00 |
▆▇▃▁▁ |
| yellow_card_home_team |
0 |
1.00 |
1.98 |
1.45 |
0.00 |
1.00 |
2.00 |
3.00 |
9.00 |
▇▇▂▁▁ |
| yellow_card_away_team |
0 |
1.00 |
2.33 |
1.51 |
0.00 |
1.00 |
2.00 |
3.00 |
10.00 |
▇▅▁▁▁ |
| red_card_home_team |
0 |
1.00 |
0.05 |
0.24 |
0.00 |
0.00 |
0.00 |
0.00 |
2.00 |
▇▁▁▁▁ |
| red_card_away_team |
0 |
1.00 |
0.07 |
0.26 |
0.00 |
0.00 |
0.00 |
0.00 |
2.00 |
▇▁▁▁▁ |
| crosses_home_team |
0 |
1.00 |
15.35 |
11.27 |
0.00 |
6.00 |
16.00 |
23.00 |
72.00 |
▇▇▂▁▁ |
| crosses_away_team |
0 |
1.00 |
11.87 |
9.16 |
0.00 |
4.00 |
12.00 |
18.00 |
55.00 |
▇▆▂▁▁ |
| corner_home_team |
0 |
1.00 |
4.64 |
3.72 |
0.00 |
1.00 |
5.00 |
7.00 |
20.00 |
▇▆▂▁▁ |
| corner_away_team |
0 |
1.00 |
3.59 |
3.05 |
0.00 |
0.00 |
3.00 |
6.00 |
19.00 |
▇▆▂▁▁ |
| possession_home_team |
0 |
1.00 |
52.98 |
8.58 |
4.00 |
48.00 |
53.00 |
59.00 |
83.00 |
▁▁▅▇▁ |
| possession_away_team |
0 |
1.00 |
47.02 |
8.58 |
17.00 |
41.00 |
47.00 |
52.00 |
96.00 |
▁▇▅▁▁ |
| H_Age |
0 |
1.00 |
27.27 |
1.38 |
23.26 |
26.35 |
27.28 |
28.18 |
32.24 |
▁▆▇▂▁ |
| A_Age |
0 |
1.00 |
27.25 |
1.35 |
22.65 |
26.32 |
27.25 |
28.16 |
32.29 |
▁▅▇▃▁ |
# Summary statistics of the whole data set
summary(matches)
## country_id league_id season stage
## Min. : 1729 Min. : 1729 Length:6080 Min. : 1.0
## 1st Qu.: 1729 1st Qu.: 1729 Class :character 1st Qu.:10.0
## Median :11624 Median :11624 Mode :character Median :19.5
## Mean :11624 Mean :11624 Mean :19.5
## 3rd Qu.:21518 3rd Qu.:21518 3rd Qu.:29.0
## Max. :21518 Max. :21518 Max. :38.0
##
## date match_api_id home_team_api_id away_team_api_id
## Length:6080 Min. : 489042 Min. : 7869 Min. : 7869
## Class :character 1st Qu.: 801150 1st Qu.: 8472 1st Qu.: 8472
## Mode :character Median :1140129 Median : 8654 Median : 8654
## Mean :1196727 Mean : 9087 Mean : 9087
## 3rd Qu.:1560639 3rd Qu.: 9869 3rd Qu.: 9869
## Max. :2030537 Max. :10281 Max. :10281
##
## home_team_goal away_team_goal home_player_X1 home_player_X2
## Min. : 0.000 Min. :0.000 Min. :1 Min. :1.000
## 1st Qu.: 1.000 1st Qu.:0.000 1st Qu.:1 1st Qu.:2.000
## Median : 1.000 Median :1.000 Median :1 Median :2.000
## Mean : 1.591 Mean :1.148 Mean :1 Mean :2.023
## 3rd Qu.: 2.000 3rd Qu.:2.000 3rd Qu.:1 3rd Qu.:2.000
## Max. :10.000 Max. :8.000 Max. :1 Max. :8.000
##
## home_player_X3 home_player_X4 home_player_X5 home_player_X6
## Min. :1.000 Min. :2.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:6.000 1st Qu.:8.000 1st Qu.:2.000
## Median :4.000 Median :6.000 Median :8.000 Median :4.000
## Mean :4.023 Mean :6.009 Mean :7.852 Mean :3.188
## 3rd Qu.:4.000 3rd Qu.:6.000 3rd Qu.:8.000 3rd Qu.:4.000
## Max. :8.000 Max. :8.000 Max. :9.000 Max. :9.000
##
## home_player_X7 home_player_X8 home_player_X9 home_player_X10
## Min. :2.000 Min. :2.000 Min. :1.000 Min. :3.00
## 1st Qu.:4.000 1st Qu.:3.000 1st Qu.:5.000 1st Qu.:4.00
## Median :5.000 Median :5.000 Median :5.000 Median :5.00
## Mean :4.944 Mean :4.733 Mean :5.932 Mean :5.75
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:8.000 3rd Qu.:7.00
## Max. :8.000 Max. :9.000 Max. :9.000 Max. :9.00
##
## home_player_X11 away_player_X1 away_player_X2 away_player_X3 away_player_X4
## Min. :1.00 Min. :1 Min. :1.000 Min. :2.000 Min. :1.000
## 1st Qu.:5.00 1st Qu.:1 1st Qu.:2.000 1st Qu.:4.000 1st Qu.:6.000
## Median :5.00 Median :1 Median :2.000 Median :4.000 Median :6.000
## Mean :5.54 Mean :1 Mean :2.027 Mean :4.029 Mean :6.013
## 3rd Qu.:6.00 3rd Qu.:1 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.:6.000
## Max. :7.00 Max. :2 Max. :8.000 Max. :8.000 Max. :8.000
##
## away_player_X5 away_player_X6 away_player_X7 away_player_X8 away_player_X9
## Min. :1.000 Min. :1.00 Min. :2.000 Min. :2.000 Min. :1.000
## 1st Qu.:8.000 1st Qu.:2.00 1st Qu.:4.000 1st Qu.:3.000 1st Qu.:5.000
## Median :8.000 Median :4.00 Median :5.000 Median :5.000 Median :5.000
## Mean :7.808 Mean :3.21 Mean :4.906 Mean :4.727 Mean :5.849
## 3rd Qu.:8.000 3rd Qu.:4.00 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:8.000
## Max. :9.000 Max. :9.00 Max. :8.000 Max. :9.000 Max. :9.000
##
## away_player_X10 away_player_X11 home_player_Y1 home_player_Y2 home_player_Y3
## Min. :1.000 Min. :3.00 Min. :1 Min. :3 Min. :3
## 1st Qu.:4.000 1st Qu.:5.00 1st Qu.:1 1st Qu.:3 1st Qu.:3
## Median :7.000 Median :5.00 Median :1 Median :3 Median :3
## Mean :5.907 Mean :5.52 Mean :1 Mean :3 Mean :3
## 3rd Qu.:7.000 3rd Qu.:6.00 3rd Qu.:1 3rd Qu.:3 3rd Qu.:3
## Max. :9.000 Max. :7.00 Max. :1 Max. :3 Max. :3
## NA's :1
## home_player_Y4 home_player_Y5 home_player_Y6 home_player_Y7 home_player_Y8
## Min. :3 Min. :3.000 Min. :3.000 Min. :5.000 Min. :3.000
## 1st Qu.:3 1st Qu.:3.000 1st Qu.:6.000 1st Qu.:6.000 1st Qu.:7.000
## Median :3 Median :3.000 Median :6.000 Median :7.000 Median :7.000
## Mean :3 Mean :3.066 Mean :6.447 Mean :6.561 Mean :7.437
## 3rd Qu.:3 3rd Qu.:3.000 3rd Qu.:7.000 3rd Qu.:7.000 3rd Qu.:8.000
## Max. :3 Max. :7.000 Max. :8.000 Max. :8.000 Max. :9.000
##
## home_player_Y9 home_player_Y10 home_player_Y11 away_player_Y1
## Min. : 6.000 Min. : 6.000 Min. : 1.00 Min. :1
## 1st Qu.: 7.000 1st Qu.: 8.000 1st Qu.:10.00 1st Qu.:1
## Median : 8.000 Median : 9.000 Median :11.00 Median :1
## Mean : 7.844 Mean : 8.891 Mean :10.58 Mean :1
## 3rd Qu.: 8.000 3rd Qu.:10.000 3rd Qu.:11.00 3rd Qu.:1
## Max. :10.000 Max. :11.000 Max. :11.00 Max. :3
##
## away_player_Y2 away_player_Y3 away_player_Y4 away_player_Y5 away_player_Y6
## Min. :3 Min. :3 Min. :3 Min. :3.000 Min. :3.000
## 1st Qu.:3 1st Qu.:3 1st Qu.:3 1st Qu.:3.000 1st Qu.:6.000
## Median :3 Median :3 Median :3 Median :3.000 Median :6.000
## Mean :3 Mean :3 Mean :3 Mean :3.084 Mean :6.435
## 3rd Qu.:3 3rd Qu.:3 3rd Qu.:3 3rd Qu.:3.000 3rd Qu.:7.000
## Max. :3 Max. :3 Max. :3 Max. :7.000 Max. :8.000
##
## away_player_Y7 away_player_Y8 away_player_Y9 away_player_Y10
## Min. :3.000 Min. :5.000 Min. : 6.000 Min. : 6.000
## 1st Qu.:6.000 1st Qu.:7.000 1st Qu.: 7.000 1st Qu.: 8.000
## Median :7.000 Median :7.000 Median : 8.000 Median : 8.000
## Mean :6.577 Mean :7.437 Mean : 7.875 Mean : 8.792
## 3rd Qu.:7.000 3rd Qu.:8.000 3rd Qu.: 8.000 3rd Qu.:10.000
## Max. :8.000 Max. :9.000 Max. :10.000 Max. :11.000
##
## away_player_Y11 home_player_1 home_player_2 home_player_3
## Min. : 8.00 Min. : 2984 Min. : 2802 Min. : 2752
## 1st Qu.:10.00 1st Qu.: 30660 1st Qu.: 31303 1st Qu.: 26209
## Median :11.00 Median : 34382 Median : 37754 Median : 34193
## Mean :10.61 Mean : 63474 Mean : 91024 Mean : 75413
## 3rd Qu.:11.00 3rd Qu.: 69650 3rd Qu.:150466 3rd Qu.: 77741
## Max. :11.00 Max. :532942 Max. :690308 Max. :643570
## NA's :1
## home_player_4 home_player_5 home_player_6 home_player_7
## Min. : 2752 Min. : 2752 Min. : 2802 Min. : 2802
## 1st Qu.: 27668 1st Qu.: 31921 1st Qu.: 30893 1st Qu.: 30598
## Median : 37440 Median : 40006 Median : 38469 Median : 38609
## Mean : 76323 Mean : 88640 Mean : 81478 Mean : 82099
## 3rd Qu.: 93458 3rd Qu.:111930 3rd Qu.:109621 3rd Qu.:109621
## Max. :580589 Max. :693138 Max. :722766 Max. :683450
##
## home_player_8 home_player_9 home_player_10 home_player_11
## Min. : 2802 Min. : 2770 Min. : 2802 Min. : 2802
## 1st Qu.: 30876 1st Qu.: 30892 1st Qu.: 30840 1st Qu.: 30830
## Median : 38807 Median : 38433 Median : 38460 Median : 38044
## Mean : 87450 Mean : 87783 Mean : 86420 Mean : 75563
## 3rd Qu.:109058 3rd Qu.:114031 3rd Qu.:109491 3rd Qu.: 75445
## Max. :683450 Max. :722766 Max. :742405 Max. :696365
##
## away_player_1 away_player_2 away_player_3 away_player_4
## Min. : 2796 Min. : 2790 Min. : 2752 Min. : 2752
## 1st Qu.: 30657 1st Qu.: 31306 1st Qu.: 26552 1st Qu.: 26777
## Median : 33986 Median : 38067 Median : 34193 Median : 37451
## Mean : 63649 Mean : 91853 Mean : 73554 Mean : 78356
## 3rd Qu.: 69650 3rd Qu.:150480 3rd Qu.: 75395 3rd Qu.: 97491
## Max. :532942 Max. :706985 Max. :643570 Max. :684723
##
## away_player_5 away_player_6 away_player_7 away_player_8
## Min. : 2790 Min. : 2802 Min. : 2802 Min. : 2802
## 1st Qu.: 31291 1st Qu.: 30889 1st Qu.: 30655 1st Qu.: 30871
## Median : 40006 Median : 38746 Median : 38818 Median : 38807
## Mean : 87964 Mean : 82427 Mean : 82676 Mean : 88888
## 3rd Qu.:111865 3rd Qu.:109898 3rd Qu.:111020 3rd Qu.:108568
## Max. :693138 Max. :722766 Max. :750435 Max. :710807
##
## away_player_9 away_player_10 away_player_11 goal
## Min. : 2802 Min. : 2770 Min. : 2802 Length:6080
## 1st Qu.: 30893 1st Qu.: 30853 1st Qu.: 30830 Class :character
## Median : 38433 Median : 38570 Median : 38134 Mode :character
## Mean : 88430 Mean : 88476 Mean : 77186
## 3rd Qu.:111990 3rd Qu.:110189 3rd Qu.: 96509
## Max. :722766 Max. :722766 Max. :696365
##
## shoton shotoff foulcommit card
## Length:6080 Length:6080 Length:6080 Length:6080
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## cross corner possession B365H
## Length:6080 Length:6080 Length:6080 Min. : 1.040
## Class :character Class :character Class :character 1st Qu.: 1.670
## Mode :character Mode :character Mode :character Median : 2.100
## Mean : 2.732
## 3rd Qu.: 2.800
## Max. :26.000
## NA's :1
## B365D B365A BWH BWD
## Min. : 2.500 Min. : 1.080 Min. : 1.030 Min. : 2.400
## 1st Qu.: 3.300 1st Qu.: 2.550 1st Qu.: 1.650 1st Qu.: 3.250
## Median : 3.500 Median : 3.600 Median : 2.100 Median : 3.400
## Mean : 4.057 Mean : 5.068 Mean : 2.653 Mean : 3.937
## 3rd Qu.: 4.200 3rd Qu.: 5.500 3rd Qu.: 2.750 3rd Qu.: 4.000
## Max. :17.000 Max. :41.000 Max. :34.000 Max. :19.500
## NA's :1 NA's :1 NA's :2 NA's :2
## BWA IWH IWD IWA
## Min. : 1.100 Min. : 1.050 Min. : 2.500 Min. : 1.10
## 1st Qu.: 2.500 1st Qu.: 1.650 1st Qu.: 3.300 1st Qu.: 2.50
## Median : 3.500 Median : 2.100 Median : 3.400 Median : 3.30
## Mean : 4.738 Mean : 2.558 Mean : 3.786 Mean : 4.43
## 3rd Qu.: 5.250 3rd Qu.: 2.600 3rd Qu.: 3.900 3rd Qu.: 4.90
## Max. :51.000 Max. :20.000 Max. :11.000 Max. :25.00
## NA's :2 NA's :6 NA's :6 NA's :6
## LBH LBD LBA PSH
## Min. : 1.040 Min. : 2.380 Min. : 1.100 Min. : 1.04
## 1st Qu.: 1.660 1st Qu.: 3.300 1st Qu.: 2.500 1st Qu.: 1.66
## Median : 2.100 Median : 3.400 Median : 3.500 Median : 2.15
## Mean : 2.638 Mean : 3.925 Mean : 4.718 Mean : 2.89
## 3rd Qu.: 2.750 3rd Qu.: 4.000 3rd Qu.: 5.000 3rd Qu.: 2.99
## Max. :26.000 Max. :19.000 Max. :51.000 Max. :36.00
## NA's :3 NA's :3 NA's :3 NA's :3044
## PSD PSA WHH WHD
## Min. : 3.040 Min. : 1.090 Min. : 1.020 Min. : 2.380
## 1st Qu.: 3.440 1st Qu.: 2.550 1st Qu.: 1.670 1st Qu.: 3.200
## Median : 3.690 Median : 3.780 Median : 2.100 Median : 3.300
## Mean : 4.409 Mean : 5.466 Mean : 2.698 Mean : 3.806
## 3rd Qu.: 4.440 3rd Qu.: 5.822 3rd Qu.: 2.750 3rd Qu.: 3.800
## Max. :29.000 Max. :47.500 Max. :26.000 Max. :17.000
## NA's :3044 NA's :3044 NA's :1 NA's :1
## WHA SJH SJD SJA
## Min. : 1.080 Min. : 1.040 Min. : 2.700 Min. : 1.130
## 1st Qu.: 2.500 1st Qu.: 1.670 1st Qu.: 3.300 1st Qu.: 2.600
## Median : 3.500 Median : 2.100 Median : 3.500 Median : 3.600
## Mean : 4.924 Mean : 2.683 Mean : 3.983 Mean : 5.071
## 3rd Qu.: 5.500 3rd Qu.: 2.700 3rd Qu.: 4.000 3rd Qu.: 5.500
## Max. :51.000 Max. :23.000 Max. :15.000 Max. :41.000
## NA's :1 NA's :1452 NA's :1452 NA's :1452
## VCH VCD VCA GBH
## Min. : 1.030 Min. : 2.500 Min. : 1.080 Min. : 1.050
## 1st Qu.: 1.670 1st Qu.: 3.300 1st Qu.: 2.600 1st Qu.: 1.670
## Median : 2.100 Median : 3.500 Median : 3.600 Median : 2.100
## Mean : 2.786 Mean : 4.134 Mean : 5.307 Mean : 2.593
## 3rd Qu.: 2.880 3rd Qu.: 4.200 3rd Qu.: 5.500 3rd Qu.: 2.600
## Max. :36.000 Max. :26.000 Max. :67.000 Max. :17.000
## NA's :2 NA's :2 NA's :2 NA's :2288
## GBD GBA BSH BSD
## Min. : 2.750 Min. : 1.12 Min. : 1.040 Min. : 2.750
## 1st Qu.: 3.250 1st Qu.: 2.60 1st Qu.: 1.670 1st Qu.: 3.250
## Median : 3.400 Median : 3.50 Median : 2.100 Median : 3.400
## Mean : 3.825 Mean : 4.63 Mean : 2.593 Mean : 3.822
## 3rd Qu.: 3.800 3rd Qu.: 5.00 3rd Qu.: 2.620 3rd Qu.: 3.800
## Max. :11.000 Max. :34.00 Max. :17.000 Max. :13.000
## NA's :2288 NA's :2288 NA's :2281 NA's :2281
## BSA on_target_shot_home_team on_target_shot_away_team
## Min. : 1.140 Min. : 0.000 Min. : 0.000
## 1st Qu.: 2.600 1st Qu.: 1.000 1st Qu.: 1.000
## Median : 3.500 Median : 5.000 Median : 4.000
## Mean : 4.733 Mean : 4.879 Mean : 3.862
## 3rd Qu.: 5.000 3rd Qu.: 7.000 3rd Qu.: 6.000
## Max. :34.000 Max. :26.000 Max. :19.000
## NA's :2281
## off_target_shot_home_team off_target_shot_away_team foul_home_team
## Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 1.000 1st Qu.: 1.000 1st Qu.: 5.000
## Median : 5.000 Median : 4.000 Median :10.000
## Mean : 4.907 Mean : 3.865 Mean : 8.983
## 3rd Qu.: 7.000 3rd Qu.: 6.000 3rd Qu.:13.000
## Max. :21.000 Max. :19.000 Max. :32.000
##
## foul_away_team yellow_card_home_team yellow_card_away_team
## Min. : 0.000 Min. :0.000 Min. : 0.000
## 1st Qu.: 5.000 1st Qu.:1.000 1st Qu.: 1.000
## Median :10.000 Median :2.000 Median : 2.000
## Mean : 9.346 Mean :1.981 Mean : 2.331
## 3rd Qu.:14.000 3rd Qu.:3.000 3rd Qu.: 3.000
## Max. :37.000 Max. :9.000 Max. :10.000
##
## red_card_home_team red_card_away_team crosses_home_team crosses_away_team
## Min. :0.00000 Min. :0.00000 Min. : 0.00 Min. : 0.00
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.: 6.00 1st Qu.: 4.00
## Median :0.00000 Median :0.00000 Median :16.00 Median :12.00
## Mean :0.05493 Mean :0.06546 Mean :15.35 Mean :11.87
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:23.00 3rd Qu.:18.00
## Max. :2.00000 Max. :2.00000 Max. :72.00 Max. :55.00
##
## corner_home_team corner_away_team possession_home_team possession_away_team
## Min. : 0.000 Min. : 0.000 Min. : 4.00 Min. :17.00
## 1st Qu.: 1.000 1st Qu.: 0.000 1st Qu.:48.00 1st Qu.:41.00
## Median : 5.000 Median : 3.000 Median :53.00 Median :47.00
## Mean : 4.641 Mean : 3.595 Mean :52.98 Mean :47.02
## 3rd Qu.: 7.000 3rd Qu.: 6.000 3rd Qu.:59.00 3rd Qu.:52.00
## Max. :20.000 Max. :19.000 Max. :83.00 Max. :96.00
##
## H_Age A_Age Country League
## Min. :23.26 Min. :22.65 Length:6080 Length:6080
## 1st Qu.:26.35 1st Qu.:26.32 Class :character Class :character
## Median :27.28 Median :27.25 Mode :character Mode :character
## Mean :27.27 Mean :27.25
## 3rd Qu.:28.18 3rd Qu.:28.16
## Max. :32.24 Max. :32.29
##
## HomeTeam AwayTeam
## Length:6080 Length:6080
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
glimpse(matches)
## Rows: 6,080
## Columns: 136
## $ country_id <int> 1729, 1729, 1729, 1729, 1729, 1729, 1729, 17…
## $ league_id <int> 1729, 1729, 1729, 1729, 1729, 1729, 1729, 17…
## $ season <chr> "2008/2009", "2008/2009", "2008/2009", "2008…
## $ stage <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 10, 10…
## $ date <chr> "8/17/2008", "8/16/2008", "8/16/2008", "8/16…
## $ match_api_id <int> 489042, 489043, 489044, 489045, 489046, 4890…
## $ home_team_api_id <int> 10260, 9825, 8472, 8654, 10252, 8668, 8549, …
## $ away_team_api_id <int> 10261, 8659, 8650, 8528, 8456, 8655, 8586, 1…
## $ home_team_goal <int> 1, 1, 0, 2, 4, 2, 2, 3, 2, 4, 2, 4, 2, 0, 0,…
## $ away_team_goal <int> 1, 0, 1, 1, 2, 3, 1, 1, 1, 0, 0, 4, 0, 1, 3,…
## $ home_player_X1 <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ home_player_X2 <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ home_player_X3 <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,…
## $ home_player_X4 <int> 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,…
## $ home_player_X5 <int> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,…
## $ home_player_X6 <int> 2, 2, 2, 2, 2, 1, 2, 1, 2, 3, 2, 2, 2, 2, 3,…
## $ home_player_X7 <int> 4, 4, 4, 4, 4, 3, 4, 3, 4, 5, 4, 4, 4, 4, 5,…
## $ home_player_X8 <int> 6, 6, 6, 6, 6, 5, 6, 5, 6, 7, 6, 6, 6, 6, 7,…
## $ home_player_X9 <int> 8, 8, 8, 8, 8, 7, 8, 7, 8, 4, 8, 8, 8, 8, 5,…
## $ home_player_X10 <int> 4, 4, 4, 4, 4, 9, 4, 9, 4, 6, 4, 4, 4, 4, 4,…
## $ home_player_X11 <int> 6, 6, 6, 6, 6, 5, 6, 5, 6, 5, 6, 6, 6, 6, 6,…
## $ away_player_X1 <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ away_player_X2 <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ away_player_X3 <int> 4, 4, 4, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,…
## $ away_player_X4 <int> 6, 6, 6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,…
## $ away_player_X5 <int> 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,…
## $ away_player_X6 <int> 2, 5, 2, 2, 1, 4, 2, 2, 2, 2, 3, 2, 5, 2, 3,…
## $ away_player_X7 <int> 4, 7, 4, 4, 3, 6, 4, 4, 4, 4, 5, 4, 2, 4, 5,…
## $ away_player_X8 <int> 6, 9, 6, 6, 5, 8, 6, 6, 6, 6, 7, 6, 4, 6, 7,…
## $ away_player_X9 <int> 8, 1, 8, 8, 7, 2, 8, 8, 8, 8, 3, 8, 6, 8, 3,…
## $ away_player_X10 <int> 5, 3, 4, 4, 9, 6, 4, 4, 4, 4, 5, 5, 8, 4, 5,…
## $ away_player_X11 <int> 5, 5, 6, 6, 5, 4, 6, 6, 6, 6, 7, 5, 5, 6, 7,…
## $ home_player_Y1 <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ home_player_Y2 <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ home_player_Y3 <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ home_player_Y4 <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ home_player_Y5 <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ home_player_Y6 <int> 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 7, 5,…
## $ home_player_Y7 <int> 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 7, 5,…
## $ home_player_Y8 <int> 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 7, 5,…
## $ home_player_Y9 <int> 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 7, 7, 7, 7, 8,…
## $ home_player_Y10 <int> 10, 10, 10, 10, 10, 7, 10, 7, 10, 8, 10, 10,…
## $ home_player_Y11 <int> 10, 10, 10, 10, 10, 11, 10, 11, 10, 11, 10, …
## $ away_player_Y1 <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ away_player_Y2 <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ away_player_Y3 <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ away_player_Y4 <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ away_player_Y5 <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ away_player_Y6 <int> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 5, 7, 7,…
## $ away_player_Y7 <int> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,…
## $ away_player_Y8 <int> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,…
## $ away_player_Y9 <int> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, 7, 7, 7, 1…
## $ away_player_Y10 <int> 9, 7, 10, 10, 7, 10, 10, 10, 10, 10, 10, 9, …
## $ away_player_Y11 <int> 11, 11, 10, 10, 11, 10, 10, 10, 10, 10, 10, …
## $ home_player_1 <int> 30726, 23686, 32562, 36374, 30380, 31465, 35…
## $ home_player_2 <int> 30362, 26111, 38836, 30966, 30357, 30371, 24…
## $ home_player_3 <int> 30620, 38835, 24446, 23818, 24658, 24004, 24…
## $ home_player_4 <int> 30865, 30986, 24408, 37277, 43280, 33086, 38…
## $ home_player_5 <int> 32569, 31291, 36786, 30687, 23282, 30857, 24…
## $ home_player_6 <int> 24148, 31013, 38802, 36394, 38609, 24011, 24…
## $ home_player_7 <int> 34944, 30935, 24655, 37169, 24780, 109058, 9…
## $ home_player_8 <int> 30373, 39297, 17866, 24223, 23782, 23268, 24…
## $ home_player_9 <int> 24154, 26181, 30352, 24773, 23354, 24846, 30…
## $ home_player_10 <int> 24157, 30960, 23927, 34543, 23264, 24006, 35…
## $ home_player_11 <int> 30829, 36410, 24410, 23139, 26165, 24160, 42…
## $ away_player_1 <int> 24224, 36373, 30660, 34421, 31432, 30622, 30…
## $ away_player_2 <int> 25518, 36832, 37442, 34987, 46403, 37764, 34…
## $ away_player_3 <int> 24228, 23115, 30617, 35472, 24208, 19020, 38…
## $ away_player_4 <int> 30929, 37280, 24134, 111865, 23939, 23921, 2…
## $ away_player_5 <int> 29581, 24728, 414792, 25005, 33963, 24136, 4…
## $ away_player_6 <int> 38807, 24664, 37139, 35327, 47413, 30342, 30…
## $ away_player_7 <int> 40565, 31088, 30618, 25150, 40198, 23889, 30…
## $ away_player_8 <int> 30360, 23257, 40701, 97988, 42119, 23916, 31…
## $ away_player_9 <int> 33852, 24171, 24800, 41877, 222222, 23922, 2…
## $ away_player_10 <int> 34574, 25922, 24635, 127857, 33633, 34176, 4…
## $ away_player_11 <int> 37799, 27267, 30853, 34466, 107216, 30646, 2…
## $ goal <chr> "<goal><value><comment>n</comment><stats><go…
## $ shoton <chr> "<shoton><value><stats><blocked>1</blocked><…
## $ shotoff <chr> "<shotoff><value><stats><shotoff>1</shotoff>…
## $ foulcommit <chr> "<foulcommit><value><stats><foulscommitted>1…
## $ card <chr> "<card><value><comment>y</comment><stats><yc…
## $ cross <chr> "<cross><value><stats><crosses>1</crosses></…
## $ corner <chr> "<corner><value><stats><corners>1</corners><…
## $ possession <chr> "<possession><value><comment>56</comment><ev…
## $ B365H <dbl> 1.29, 1.20, 5.50, 1.91, 1.91, 2.00, 3.20, 1.…
## $ B365D <dbl> 5.50, 6.50, 3.60, 3.40, 3.40, 3.30, 3.40, 3.…
## $ B365A <dbl> 11.00, 15.00, 1.67, 4.20, 4.33, 4.00, 2.25, …
## $ BWH <dbl> 1.30, 1.22, 5.00, 1.90, 1.95, 1.85, 2.80, 1.…
## $ BWD <dbl> 4.75, 5.50, 3.35, 3.20, 3.20, 3.25, 3.20, 3.…
## $ BWA <dbl> 8.25, 10.00, 1.67, 3.80, 3.60, 4.00, 2.30, 4…
## $ IWH <dbl> 1.30, 1.20, 4.50, 1.80, 2.00, 2.00, 2.90, 1.…
## $ IWD <dbl> 4.4, 5.2, 3.5, 3.3, 3.2, 3.2, 3.2, 3.3, 3.2,…
## $ IWA <dbl> 8.50, 11.00, 1.65, 3.80, 3.30, 3.30, 2.20, 4…
## $ LBH <dbl> 1.25, 1.20, 4.50, 1.80, 1.83, 1.80, 2.80, 1.…
## $ LBD <dbl> 4.50, 5.00, 3.30, 3.20, 3.20, 3.20, 3.20, 3.…
## $ LBA <dbl> 10.00, 11.00, 1.67, 4.00, 3.75, 4.00, 2.20, …
## $ PSH <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ PSD <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ PSA <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ WHH <dbl> 1.25, 1.17, 5.50, 1.83, 1.91, 1.95, 2.90, 1.…
## $ WHD <dbl> 4.50, 5.50, 3.30, 3.20, 3.20, 3.10, 3.20, 3.…
## $ WHA <dbl> 10.00, 12.00, 1.57, 3.75, 3.50, 3.50, 2.15, …
## $ SJH <dbl> 1.25, 1.20, 4.33, 1.91, 1.91, 2.00, 2.88, 1.…
## $ SJD <dbl> 5.00, 5.50, 3.40, 3.25, 3.25, 3.25, 3.40, 3.…
## $ SJA <dbl> 10.00, 12.00, 1.73, 3.75, 3.75, 3.40, 2.20, …
## $ VCH <dbl> 1.28, 1.25, 5.50, 1.90, 1.90, 2.05, 3.20, 1.…
## $ VCD <dbl> 5.50, 6.00, 3.80, 3.50, 3.50, 3.30, 3.40, 3.…
## $ VCA <dbl> 12.00, 13.00, 1.65, 4.35, 4.35, 4.00, 2.30, …
## $ GBH <dbl> 1.30, 1.22, 5.00, 1.91, 1.91, 2.00, 3.00, 1.…
## $ GBD <dbl> 4.75, 5.50, 3.40, 3.25, 3.25, 3.25, 3.25, 3.…
## $ GBA <dbl> 10.00, 13.00, 1.70, 4.00, 4.00, 3.75, 2.30, …
## $ BSH <dbl> 1.29, 1.22, 4.50, 1.91, 1.91, 2.00, 2.80, 1.…
## $ BSD <dbl> 4.50, 5.00, 3.40, 3.25, 3.30, 3.25, 3.25, 3.…
## $ BSA <dbl> 11.00, 13.00, 1.73, 3.80, 3.75, 3.50, 2.30, …
## $ on_target_shot_home_team <int> 11, 12, 4, 5, 5, 2, 7, 5, 5, 6, 11, 9, 5, 13…
## $ on_target_shot_away_team <int> 1, 2, 11, 7, 9, 8, 2, 1, 4, 7, 3, 3, 11, 6, …
## $ off_target_shot_home_team <int> 10, 13, 3, 7, 4, 7, 5, 7, 5, 10, 8, 11, 2, 6…
## $ off_target_shot_away_team <int> 9, 3, 5, 15, 5, 8, 5, 6, 6, 5, 3, 5, 5, 4, 1…
## $ foul_home_team <int> 16, 11, 13, 14, 11, 11, 11, 14, 10, 11, 11, …
## $ foul_away_team <int> 11, 9, 12, 13, 13, 11, 11, 15, 9, 8, 15, 16,…
## $ yellow_card_home_team <int> 3, 0, 0, 2, 0, 2, 1, 1, 3, 0, 1, 1, 1, 1, 0,…
## $ yellow_card_away_team <int> 0, 0, 2, 1, 1, 2, 2, 2, 0, 1, 2, 4, 3, 2, 2,…
## $ red_card_home_team <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ red_card_away_team <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ crosses_home_team <int> 24, 21, 15, 15, 16, 14, 25, 22, 16, 25, 14, …
## $ crosses_away_team <int> 9, 7, 19, 27, 16, 21, 27, 29, 14, 9, 6, 14, …
## $ corner_home_team <int> 6, 7, 1, 6, 7, 3, 7, 4, 5, 8, 3, 9, 3, 5, 5,…
## $ corner_away_team <int> 6, 5, 8, 10, 8, 4, 9, 3, 6, 3, 1, 1, 3, 3, 4…
## $ possession_home_team <int> 55, 66, 46, 52, 52, 51, 53, 47, 53, 65, 60, …
## $ possession_away_team <int> 45, 34, 54, 48, 48, 49, 47, 53, 47, 35, 40, …
## $ H_Age <dbl> 28.57454, 23.97286, 26.75757, 26.17812, 27.5…
## $ A_Age <dbl> 26.40214, 25.60740, 27.40097, 26.64033, 23.7…
## $ Country <chr> "England", "England", "England", "England", …
## $ League <chr> "England Premier League", "England Premier L…
## $ HomeTeam <chr> "Manchester United", "Arsenal", "Sunderland"…
## $ AwayTeam <chr> "Newcastle United", "West Bromwich Albion", …
# Dropping columns that are not required for our project.
matches <- matches[,-1:-2]
matches <- matches %>% select(-(match_api_id:away_team_api_id))
matches <- matches %>% select(-(goal:BSA))
matches <- matches %>% select(-(home_player_X1:away_player_Y11))
# Replacing NA values on players id column with a fake id of 222222. We want to replace the NA values in player names with a fake player who has an age, height and weight so that we can substitute those attributes with the average age, height and weight of all players.
matches <- matches %>%
mutate_at(c(6:27), ~replace_na(.,222222))
# Converting the "date" variable in matches data frame and "birthday" variable in players data frame to date type using the Lubridate package
matches <- matches %>%
mutate(date = mdy(date))
players <- players %>%
mutate(birthday = ymd(birthday))
# Performing left join operation to get the names of each players in the data frame and removing their player IDs.
nam <- c(names(matches[c(6:27)])) # List of columns from 6th index to 27th.
for (i in 1:length(nam)){
names(matches)[names(matches) == nam[i]] <- "player_api_id" # Changing the name of 6th to 27th column to the same column name so that we can match the column with the column in players data frame.
matches <- matches %>% left_join(players[c("player_api_id", "player_name")],
by = "player_api_id")
names(matches)[names(matches) == "player_name"] <- nam[i] # After a join happens we rename the column back to its original name so that we don't have two columns with same names.
matches = subset(matches, select = -c(player_api_id) ) # Removing the column which had the player's id, we just need their names
}
matchesF <- matches # I did the next part separately in a different file using the data frame named 'matchesF' so we don't need to change the names in all places
# To merge the matchesF table with the managers and referees of each league, we need a column that matches on all tables. The team names in some tables use a deprecated form of their full name so I renamed all the Team names so they matched everywhere.
eplref$Home <- str_replace(eplref$Home, "Tottenham", "Tottenham Hotspur")
eplref$Home <- str_replace(eplref$Home, "Newcastle", "Newcastle United")
eplref$Home <- str_replace(eplref$Home, "West Ham", "West Ham United")
eplref$Home <- str_replace(eplref$Home, "Swansea", "Swansea City")
eplref$Home <- str_replace(eplref$Home, "Wigan", "Wigan Athletic")
eplref$Home <- str_replace(eplref$Home, "Norwich", "Norwich City")
eplref$Home <- str_replace(eplref$Home, "Wolverhampton", "Wolverhampton Wanderers")
eplref$Home <- str_replace(eplref$Home, "Birmingham", "Birmingham City")
eplref$Home <- str_replace(eplref$Home, "Leicester", "Leicester City")
eplref$Home <- str_replace(eplref$Home, "Cardiff", "Cardiff City")
eplref$Away <- str_replace(eplref$Away, "Tottenham", "Tottenham Hotspur")
eplref$Away <- str_replace(eplref$Away, "Newcastle", "Newcastle United")
eplref$Away <- str_replace(eplref$Away, "West Ham", "West Ham United")
eplref$Away <- str_replace(eplref$Away, "Swansea", "Swansea City")
eplref$Away <- str_replace(eplref$Away, "Wigan", "Wigan Athletic")
eplref$Away <- str_replace(eplref$Away, "Norwich", "Norwich City")
eplref$Away <- str_replace(eplref$Away, "Wolverhampton", "Wolverhampton Wanderers")
eplref$Away <- str_replace(eplref$Away, "Birmingham", "Birmingham City")
eplref$Away <- str_replace(eplref$Away, "Leicester", "Leicester City")
eplref$Away <- str_replace(eplref$Away, "Cardiff", "Cardiff City")
laligaref$Home <- str_replace(laligaref$Home, "Real Madrid", "Real Madrid CF")
laligaref$Home <- str_replace(laligaref$Home, "Barcelona", "FC Barcelona")
laligaref$Home <- str_replace(laligaref$Home, "Málaga", "Málaga CF")
laligaref$Home <- str_replace(laligaref$Home, "Valencia", "Valencia CF")
laligaref$Home <- str_replace(laligaref$Home, "Atlético de Madrid", "Atlético Madrid")
laligaref$Home <- str_replace(laligaref$Home, "Getafe", "Getafe CF")
laligaref$Home <- str_replace(laligaref$Home, "Sevilla", "Sevilla FC")
laligaref$Home <- str_replace(laligaref$Home, "Espanyol", "RCD Espanyol")
laligaref$Home <- str_replace(laligaref$Home, "Athletic Club", "Athletic Club de Bilbao")
laligaref$Home <- str_replace(laligaref$Home, "Villarreal", "Villarreal CF")
laligaref$Home <- str_replace(laligaref$Home, "Osasuna", "CA Osasuna")
laligaref$Home <- str_replace(laligaref$Home, "Deportivo de La Coruña", "RC Deportivo de La Coruña")
laligaref$Home <- str_replace(laligaref$Home, "Levante", "Levante UD")
laligaref$Home <- str_replace(laligaref$Home, "Granada", "Granada CF")
laligaref$Home <- str_replace(laligaref$Home, "Sporting de Gijón", "Real Sporting de Gijón")
laligaref$Home <- str_replace(laligaref$Home, "Almería", "UD Almería")
laligaref$Home <- str_replace(laligaref$Home, "Mallorca", "RCD Mallorca")
laligaref$Home <- str_replace(laligaref$Home, "Betis", "Real Betis Balompié")
laligaref$Home <- str_replace(laligaref$Home, "Zaragoza", "Real Zaragoza")
laligaref$Home <- str_replace(laligaref$Home, "Valladolid", "Real Valladolid")
laligaref$Home <- str_replace(laligaref$Home, "Racing de Santander", "Racing Santander")
laligaref$Home <- str_replace(laligaref$Home, "Celta de Vigo", "RC Celta de Vigo")
laligaref$Home <- str_replace(laligaref$Home, "Elche", "Elche CF")
laligaref$Home <- str_replace(laligaref$Home, "Eibar", "SD Eibar")
laligaref$Home <- str_replace(laligaref$Home, "Córdoba", "Córdoba CF")
laligaref$Home <- str_replace(laligaref$Home, "Numancia", "CD Numancia")
laligaref$Home <- str_replace(laligaref$Home, "Las Palmas", "UD Las Palmas")
laligaref$Home <- str_replace(laligaref$Home, "Xerez", "Xerez Club Deportivo")
laligaref$Home <- str_replace(laligaref$Home, "Hércules", "Hércules Club de Fútbol")
laligaref$Home <- str_replace(laligaref$Home, "Tenerife", "CD Tenerife")
laligaref$Home <- str_replace(laligaref$Home, "Recreativo de Huelva", "RC Recreativo")
laligaref$Away <- str_replace(laligaref$Away, "Real Madrid", "Real Madrid CF")
laligaref$Away <- str_replace(laligaref$Away, "Barcelona", "FC Barcelona")
laligaref$Away <- str_replace(laligaref$Away, "Málaga", "Málaga CF")
laligaref$Away <- str_replace(laligaref$Away, "Valencia", "Valencia CF")
laligaref$Away <- str_replace(laligaref$Away, "Atlético de Madrid", "Atlético Madrid")
laligaref$Away <- str_replace(laligaref$Away, "Getafe", "Getafe CF")
laligaref$Away <- str_replace(laligaref$Away, "Sevilla", "Sevilla FC")
laligaref$Away <- str_replace(laligaref$Away, "Espanyol", "RCD Espanyol")
laligaref$Away <- str_replace(laligaref$Away, "Athletic Club", "Athletic Club de Bilbao")
laligaref$Away <- str_replace(laligaref$Away, "Villarreal", "Villarreal CF")
laligaref$Away <- str_replace(laligaref$Away, "Osasuna", "CA Osasuna")
laligaref$Away <- str_replace(laligaref$Away, "Deportivo de La Coruña", "RC Deportivo de La Coruña")
laligaref$Away <- str_replace(laligaref$Away, "Levante", "Levante UD")
laligaref$Away <- str_replace(laligaref$Away, "Granada", "Granada CF")
laligaref$Away <- str_replace(laligaref$Away, "Sporting de Gijón", "Real Sporting de Gijón")
laligaref$Away <- str_replace(laligaref$Away, "Almería", "UD Almería")
laligaref$Away <- str_replace(laligaref$Away, "Mallorca", "RCD Mallorca")
laligaref$Away <- str_replace(laligaref$Away, "Betis", "Real Betis Balompié")
laligaref$Away <- str_replace(laligaref$Away, "Zaragoza", "Real Zaragoza")
laligaref$Away <- str_replace(laligaref$Away, "Valladolid", "Real Valladolid")
laligaref$Away <- str_replace(laligaref$Away, "Racing de Santander", "Racing Santander")
laligaref$Away <- str_replace(laligaref$Away, "Celta de Vigo", "RC Celta de Vigo")
laligaref$Away <- str_replace(laligaref$Away, "Elche", "Elche CF")
laligaref$Away <- str_replace(laligaref$Away, "Eibar", "SD Eibar")
laligaref$Away <- str_replace(laligaref$Away, "Córdoba", "Córdoba CF")
laligaref$Away <- str_replace(laligaref$Away, "Numancia", "CD Numancia")
laligaref$Away <- str_replace(laligaref$Away, "Las Palmas", "UD Las Palmas")
laligaref$Away <- str_replace(laligaref$Away, "Xerez", "Xerez Club Deportivo")
laligaref$Away <- str_replace(laligaref$Away, "Hércules", "Hércules Club de Fútbol")
laligaref$Away <- str_replace(laligaref$Away, "Tenerife", "CD Tenerife")
laligaref$Away <- str_replace(laligaref$Away, "Recreativo de Huelva", "RC Recreativo")
laligaman$Team <- str_replace(laligaman$Team, "Real Madrid", "Real Madrid CF")
laligaman$Team <- str_replace(laligaman$Team, "Barcelona", "FC Barcelona")
laligaman$Team <- str_replace(laligaman$Team, "Málaga", "Málaga CF")
laligaman$Team <- str_replace(laligaman$Team, "Valencia", "Valencia CF")
laligaman$Team <- str_replace(laligaman$Team, "Atlético Madrid", "Atlético Madrid")
laligaman$Team <- str_replace(laligaman$Team, "Getafe", "Getafe CF")
laligaman$Team <- str_replace(laligaman$Team, "Sevilla", "Sevilla FC")
laligaman$Team <- str_replace(laligaman$Team, "Espanyol", "RCD Espanyol")
laligaman$Team <- str_replace(laligaman$Team, "Athletic Bilbao", "Athletic Club de Bilbao")
laligaman$Team <- str_replace(laligaman$Team, "Villarreal", "Villarreal CF")
laligaman$Team <- str_replace(laligaman$Team, "Osasuna", "CA Osasuna")
laligaman$Team <- str_replace(laligaman$Team, "Levante", "Levante UD")
laligaman$Team <- str_replace(laligaman$Team, "Granada", "Granada CF")
laligaman$Team <- str_replace(laligaman$Team, "Almería", "UD Almería")
laligaman$Team <- str_replace(laligaman$Team, "Mallorca", "RCD Mallorca")
laligaman$Team <- str_replace(laligaman$Team, "Zaragoza", "Real Zaragoza")
laligaman$Team <- str_replace(laligaman$Team, "Valladolid", "Real Valladolid")
laligaman$Team <- str_replace(laligaman$Team, "Racing Santander", "Racing Santander")
laligaman$Team <- str_replace(laligaman$Team, "Elche", "Elche CF")
laligaman$Team <- str_replace(laligaman$Team, "Eibar", "SD Eibar")
laligaman$Team <- str_replace(laligaman$Team, "Córdoba", "Córdoba CF")
laligaman$Team <- str_replace(laligaman$Team, "Numancia", "CD Numancia")
laligaman$Team <- str_replace(laligaman$Team, "Las Palmas", "UD Las Palmas")
laligaman$Team <- str_replace(laligaman$Team, "Xerez", "Xerez Club Deportivo")
laligaman$Team <- str_replace(laligaman$Team, "Hércules", "Hércules Club de Fútbol")
laligaman$Team <- str_replace(laligaman$Team, "Tenerife", "CD Tenerife")
laligaman$Team <- str_replace(laligaman$Team, "Recreativo", "RC Recreativo")
# Merging our Matches Data with the Manager's Data of England Premier League to get Home manager for each match
matchesF <- matchesF %>% left_join(eplman,
by = c(
"season"="Season",
"HomeTeam"="Team"))
colnames(matchesF)[which(names(matchesF) == "Manager")] <- "HomeManager"
# Merging our Matches Data with the Manager's Data of England Premier League (EPL) to get Away manager for each match
matchesF <- matchesF %>% left_join(eplman,
by = c(
"season"="Season",
"AwayTeam"="Team"))
colnames(matchesF)[which(names(matchesF) == "Manager")] <- "AwayManager"
# Merging our Matches Data with the Manager's Data of Spain LIGA BBVA (LaLiga) to get Home manager for each match
matchesF <- matchesF %>% left_join(laligaman,
by = c(
"season"="Season",
"HomeTeam"="Team"))
# When we added the Home managers for EPL, there were missing values in that column for LaLiga games. So instead of having two columns for Home Managers (One for each league), we add LaLiga's Home Managers to the same column replacing the NAs from the new column "Manager" which was created using the above left_join.
matchesF[is.na(matchesF)] <- ""
matchesF$HomeManager <- str_c(matchesF$HomeManager,"", matchesF$Manager)
matchesF = subset(matchesF, select = -c(Manager) ) # Removing the Manager column that was created from the left_join
# Merging our Matches Data with the Manager's Data of Spain LIGA BBVA (LaLiga) to get Away manager for each match
matchesF <- matchesF %>% left_join(laligaman,
by = c(
"season"="Season",
"AwayTeam"="Team"))
# Same thing as for the Home Managers.
matchesF[is.na(matchesF)] <- ""
matchesF$AwayManager <- str_c(matchesF$AwayManager,"", matchesF$Manager)
matchesF = subset(matchesF, select = -c(Manager) )
# Converting the date column into the same type to merge the matches table with the referee dataset for EPL and LaLiga.
matchesF <- matchesF %>%
mutate(date = ymd(date))
eplref <- eplref %>%
mutate(Date = dmy(Date))
laligaref <- laligaref %>%
mutate(Date = dmy(Date))
# Merging our Matches Data with the Referee's Data to get the name of the official for each match (EPL)
matchesF <- matchesF %>% left_join(eplref,
by = c(
"date"="Date",
"HomeTeam"="Home",
"AwayTeam"="Away"))
colnames(matchesF)[which(names(matchesF) == "Referee")] <- "Ref" # Renaming the "Referee" column to "Ref"
# Merging our Matches Data with the Referee's Data to get the name of the official for each match (LaLiga)
matchesF <- matchesF %>% left_join(laligaref,
by = c(
"date"="Date",
"HomeTeam"="Home",
"AwayTeam"="Away"))
# Replacing the missing referee values in the Ref column (with EPL Refs) with LaLiga Refs.
matchesF[is.na(matchesF)] <- ""
matchesF$Ref <- str_c(matchesF$Ref,"", matchesF$Referee)
matchesF = subset(matchesF, select = -c(Referee))
matchesF <- matchesF %>%
dplyr::arrange(date) # Ordering the matches table in ascending order.
# Creating new column called "FullTimeResult" which either stores which team won or draw. This is done by comparing home and away team goals for each game.
matchesF["FullTimeResult"] = NA
for (i in 1:nrow(matchesF)){
if(matchesF$home_team_goal[i] > matchesF$away_team_goal[i]){
matchesF$FullTimeResult[i] = "Home Team"
}else if(matchesF$home_team_goal[i] == matchesF$away_team_goal[i]){
matchesF$FullTimeResult[i] = "Draw"
}else{
matchesF$FullTimeResult[i] = "Away Team"
}
}
# Grouping the data by season and looking at how many goals were scored in each of those seasons in each league.
goalsPerSeason <- matchesF %>% group_by(season, League) %>%
summarize(Goals = sum(home_team_goal+away_team_goal, na.rm = T))
## `summarise()` has grouped output by 'season'. You can override using the
## `.groups` argument.
goalsPerSeason
## # A tibble: 16 × 3
## # Groups: season [8]
## season League Goals
## <chr> <chr> <int>
## 1 2008/2009 England Premier League 942
## 2 2008/2009 Spain LIGA BBVA 1101
## 3 2009/2010 England Premier League 1053
## 4 2009/2010 Spain LIGA BBVA 1031
## 5 2010/2011 England Premier League 1063
## 6 2010/2011 Spain LIGA BBVA 1042
## 7 2011/2012 England Premier League 1066
## 8 2011/2012 Spain LIGA BBVA 1050
## 9 2012/2013 England Premier League 1063
## 10 2012/2013 Spain LIGA BBVA 1091
## 11 2013/2014 England Premier League 1052
## 12 2013/2014 Spain LIGA BBVA 1045
## 13 2014/2015 England Premier League 975
## 14 2014/2015 Spain LIGA BBVA 1009
## 15 2015/2016 England Premier League 1026
## 16 2015/2016 Spain LIGA BBVA 1043
# Visualizing the same information with a Grouped Bar Chart
goalsPerSeason %>% ggplot(aes(fill=League, y=Goals, x=season)) +
geom_bar(position="dodge", stat="identity")+
labs(title = "Number of Goals throughout each Seasons in each League")+
coord_flip()+
scale_fill_viridis(discrete = T)+
theme(legend.position="bottom")

# Grouping the data by Referees to look at who gave the most red cards in all games throughout all seasons.
redPerSeason <- matchesF %>% group_by(season, League) %>%
summarize(Reds = sum(red_card_home_team+red_card_away_team, na.rm = T))
## `summarise()` has grouped output by 'season'. You can override using the
## `.groups` argument.
redPerSeason
## # A tibble: 16 × 3
## # Groups: season [8]
## season League Reds
## <chr> <chr> <int>
## 1 2008/2009 England Premier League 36
## 2 2008/2009 Spain LIGA BBVA 73
## 3 2009/2010 England Premier League 27
## 4 2009/2010 Spain LIGA BBVA 77
## 5 2010/2011 England Premier League 42
## 6 2010/2011 Spain LIGA BBVA 58
## 7 2011/2012 England Premier League 39
## 8 2011/2012 Spain LIGA BBVA 58
## 9 2012/2013 England Premier League 37
## 10 2012/2013 Spain LIGA BBVA 52
## 11 2013/2014 England Premier League 38
## 12 2013/2014 Spain LIGA BBVA 45
## 13 2014/2015 England Premier League 35
## 14 2014/2015 Spain LIGA BBVA 45
## 15 2015/2016 England Premier League 34
## 16 2015/2016 Spain LIGA BBVA 36
# Visualizing the same information with a Grouped Bar Chart
redPerSeason %>% ggplot(aes(y=Reds, x=season, fill = League)) +
geom_bar(position="dodge", stat="identity")+
labs(title = "Number of Red Cards throughout each Seasons",
x = "Seasons",
y = "Number of Red Cards")+
coord_flip()+
scale_fill_viridis(discrete = T)+
theme(legend.position="bottom")

# Function to get two data frames with the season statistics.
seasonalData <- function(a, b){ # a is the season and b is the League name
season <- matchesF[matchesF$season == a & (matchesF$League == b),] # Filtering the matches data set with the passed parameters and storing the resulting table as "season" and creating some more columns that we seek to get. This table will have all the match level information.
season['HomePoints'] <- 0
season['AwayPoints'] <- 0
season['HomePosition'] <- 0
season['AwayPosition'] <- 0
season['HomeGoalsFor'] <- 0
season['AwayGoalsFor'] <- 0
season['HomeGoalsAgainst'] <- 0
season['AwayGoalsAgainst'] <- 0
Teams <- c(unique(season$HomeTeam))
Points <- c(0)
GF <- c(0)
GA <- c(0)
GD <- c(0)
Rank <- c(0)
Played <- 38
df <- data.frame(Rank, Teams, Played, GF, GA, GD, Points) #Creating a new data frame that will hold the high level data, i.e, the overall data of the season for each team like how many points each team got, how many goals each scored.
teamPoints <- c()
for(i in unique(season$HomeTeam)){
teamPoints[i] <- 0
} # creating a dictionary like object to hold the points for each team for the season. Initial value is 0
teamGoalsfor <- c()
for(i in unique(season$HomeTeam)){
teamGoalsfor[i] <- 0
} # creating a dictionary like object to hold the goals each team scored in the season. Initial value is 0
teamGoalsagainst <- c()
for(i in unique(season$HomeTeam)){
teamGoalsagainst[i] <- 0
} # creating a dictionary like object to hold the goals each team conceded in the season. Initial value is 0
for(i in 1:nrow(season)){ # Going through each games to see which teams won.
if(season$home_team_goal[i] > season$away_team_goal[i]){
df[df$Teams == season$HomeTeam[i],]$Points = df[df$Teams == season$HomeTeam[i],]$Points+3 # Updating the points on the "df" dataframe. Each win gives 3 points.
teamPoints[season$HomeTeam[i]] = teamPoints[season$HomeTeam[i]] + 3 # Updating the points to the dictionary which we will later store in the seasons table. this is needes to get match by match points throughout the season to get a line chart.
teamPoints[season$AwayTeam[i]] = teamPoints[season$AwayTeam[i]] + 0
} else if (season$home_team_goal[i] == season$away_team_goal[i]){ # In case of a draw, we add 1 point to each team
df[df$Teams == season$HomeTeam[i],]$Points = df[df$Teams == season$HomeTeam[i],]$Points+1
df[df$Teams == season$AwayTeam[i],]$Points = df[df$Teams == season$AwayTeam[i],]$Points+1
teamPoints[season$HomeTeam[i]] = teamPoints[season$HomeTeam[i]] + 1
teamPoints[season$AwayTeam[i]] = teamPoints[season$AwayTeam[i]] + 1
} else { # Now the away team gets 3 poi
df[df$Teams == season$AwayTeam[i],]$Points = df[df$Teams == season$AwayTeam[i],]$Points+3
teamPoints[season$HomeTeam[i]] = teamPoints[season$HomeTeam[i]] + 0
teamPoints[season$AwayTeam[i]] = teamPoints[season$AwayTeam[i]] + 3
}
#Updating the goals for the team to the dictionary which we will later store in the seasons table.
teamGoalsfor[season$HomeTeam[i]] = teamGoalsfor[season$HomeTeam[i]] + season$home_team_goal[i]
teamGoalsfor[season$AwayTeam[i]] = teamGoalsfor[season$AwayTeam[i]] + season$away_team_goal[i]
#Updating the goals against the team to the dictionary which we will later store in the seasons table.
teamGoalsagainst[season$HomeTeam[i]] = teamGoalsagainst[season$HomeTeam[i]] + season$away_team_goal[i]
teamGoalsagainst[season$AwayTeam[i]] = teamGoalsagainst[season$AwayTeam[i]] + season$home_team_goal[i]
# Storing how many goals the Home and away teams scored so far in the season in the df dataframe.
df[df$Teams == season$HomeTeam[i],]$GF = df[df$Teams == season$HomeTeam[i],]$GF+season$home_team_goal[i]
df[df$Teams == season$AwayTeam[i],]$GF = df[df$Teams == season$AwayTeam[i],]$GF+season$away_team_goal[i]
# Storing how many goals the Home and away teams conceded so far in the season in the df dataframe
df[df$Teams == season$HomeTeam[i],]$GA = df[df$Teams == season$HomeTeam[i],]$GA+season$away_team_goal[i]
df[df$Teams == season$AwayTeam[i],]$GA = df[df$Teams == season$AwayTeam[i],]$GA+season$home_team_goal[i]
# Storing goal difference for each of the team df dataframe
df$GD = df$GF - df$GA
# Creating a Rank column in "df" that will give ranks based on the the points each team scored
order.Points <- order(df$Points, decreasing = TRUE)
df$Rank[order.Points] <- 1:nrow(df)
# Adding all the information in the "seasons" dataframe from the dictionaries.
season$HomePoints[i] = as.integer(teamPoints[season$HomeTeam[i]])
season$AwayPoints[i] = as.integer(teamPoints[season$AwayTeam[i]])
season$HomePosition[i] = df[df$Teams == season$HomeTeam[i],]$Rank
season$AwayPosition[i] = df[df$Teams == season$AwayTeam[i],]$Rank
season$HomeGoalsFor[i] = as.integer(teamGoalsfor[season$HomeTeam[i]])
season$AwayGoalsFor[i] = as.integer(teamGoalsfor[season$AwayTeam[i]])
season$HomeGoalsAgainst[i] = as.integer(teamGoalsagainst[season$HomeTeam[i]])
season$AwayGoalsAgainst[i] = as.integer(teamGoalsagainst[season$AwayTeam[i]])
}
df <- df[order(df$Points, decreasing = TRUE),] #Ordering the "df" table based on who has the most points.
rownames(df) <- NULL # setting the row names for both dataframes this function gives to NULL.
rownames(season) <- NULL
{return(list(SeasonTable=df, SeasonResult=season))} # Assigning a proper name to both dataframes and returning those.
}
SeasonTable <- seasonalData("2010/2011", "Spain LIGA BBVA")$SeasonTable # Getting a season table using the seasonalData function for LaLiga for the season 2010/2011. Since the function returns two tables, we are mentioning which table we want.
SeasonTable
## Rank Teams Played GF GA GD Points
## 1 1 FC Barcelona 38 95 21 74 96
## 2 2 Real Madrid CF 38 102 33 69 92
## 3 3 Valencia CF 38 64 44 20 71
## 4 4 Villarreal CF 38 54 44 10 62
## 5 5 Atlético Madrid 38 62 53 9 58
## 6 6 Athletic Club de Bilbao 38 59 55 4 58
## 7 7 Sevilla FC 38 62 61 1 58
## 8 8 RCD Espanyol 38 46 55 -9 49
## 9 9 CA Osasuna 38 45 46 -1 47
## 10 10 Real Sporting de Gijón 38 35 42 -7 47
## 11 11 Málaga CF 38 54 68 -14 46
## 12 12 Racing Santander 38 41 56 -15 46
## 13 13 Levante UD 38 41 52 -11 45
## 14 14 Real Sociedad 38 49 66 -17 45
## 15 15 Real Zaragoza 38 40 53 -13 45
## 16 16 RCD Mallorca 38 41 56 -15 44
## 17 17 Getafe CF 38 49 60 -11 44
## 18 18 RC Deportivo de La Coruña 38 31 47 -16 43
## 19 19 Hércules Club de Fútbol 38 36 60 -24 35
## 20 20 UD Almería 38 36 70 -34 30
SeasonResult <- seasonalData("2010/2011", "Spain LIGA BBVA")$SeasonResult # Getting a season result using the seasonalData function for LaLiga for the season 2010/2011. This includes a match to match information of all the games played in LaLiga in 2010/2011 season.
# A bar chart
SeasonResult %>% count(FullTimeResult) %>% ggplot(aes(x = fct_reorder(FullTimeResult, -n),
y = n))+
geom_col(fill = "#3D195B", color = "black")+
labs(title = "Match Result for the Season",
x = "Result",
y = "Number of Wins")+
theme_bw()

# Grouping the data by Referees and looking at how many games they officiated in that season and in that league.
refCount <- SeasonResult %>% group_by(Ref) %>%
summarize(Games = n())
# Grouping the data by Referees and looking at how many red cards they gave that season.
refRed <- SeasonResult %>% group_by(Ref) %>%
summarize(Red = sum(red_card_home_team+red_card_away_team, na.rm = T))
refRed <- refRed %>% left_join(refCount,
by = c("Ref"="Ref"))
# Grouping the data by Referees and looking at how many yellow cards they gave that season.
refYellow <- SeasonResult %>% group_by(Ref) %>%
summarize(Yellow = sum(yellow_card_home_team+yellow_card_away_team, na.rm = T))
# Merging the two tables together
refRedYellow <- refRed %>% left_join(refYellow,
by = c("Ref"="Ref"))
refRedYellow
## # A tibble: 19 × 4
## Ref Red Games Yellow
## <chr> <int> <int> <int>
## 1 Álvarez Izquierdo 5 21 131
## 2 Ayza Gámez 4 19 84
## 3 Clos Gómez 3 20 115
## 4 Delgado Ferreiro 1 18 85
## 5 Estrada Fernández 2 18 100
## 6 Fernández Borbalán 6 18 105
## 7 González González 2 19 99
## 8 Iglesias Villanueva 1 19 101
## 9 Iturralde González 4 17 98
## 10 Mateu Lahoz 3 20 69
## 11 Muñiz Fernández 6 24 154
## 12 Paradas Romero 2 17 87
## 13 Pérez Lasa 3 18 94
## 14 Ramírez Domínguez 3 20 111
## 15 Rubinos Pérez 5 15 67
## 16 Teixeira Vitienes 4 38 170
## 17 Turienzo Álvarez 1 22 125
## 18 Undiano Mallenco 0 21 120
## 19 Velasco Carballo 3 16 98
# Information from the above table in a barchart.
refRedYellow %>% ggplot() +
geom_bar(aes(reorder(Ref, Games), Games, fill = Red), color = "black", stat="identity")+
scale_fill_gradient(low = "white", high = "darkred")+
coord_flip()+
theme_minimal()+
labs(title = "Games the referees officiated and number of Red Cards",
x = "Referee",
y = "Number of Games Officiated")+
scale_y_continuous(breaks = seq(0,38,2))+
theme(legend.position = "bottom")

refRedYellow %>% ggplot() +
geom_bar(aes(reorder(Ref, Games), Games, fill = Yellow), color = "black", stat="identity")+
scale_fill_gradient(low = "white", high = "yellow")+
coord_flip()+
theme_minimal()+
labs(title = "Games the referees officiated and number of Yellow Cards",
x = "Referee",
y = "Number of Games Officiated")+
scale_y_continuous(breaks = seq(0,38,2))+
theme(legend.position = "bottom")

# Creating two individual box plots that give information on the home and away possession of all team for the season.
homePossession <- SeasonResult %>%
ggplot()+
geom_boxplot(aes(x = reorder(HomeTeam, possession_home_team), fill = HomeTeam, y = possession_home_team))+
theme(axis.text.x = element_text(angle = 90))+
labs(title = "Relationship between Home Teams and their Possession",
x = "Home Teams",
y = "Possession")+
coord_flip()+
theme_bw()+
theme(legend.position="none")
ggplotly(homePossession)
awayPossession <- SeasonResult %>%
ggplot()+
geom_boxplot(aes(x = reorder(AwayTeam, possession_away_team), fill = AwayTeam, y = possession_away_team))+
theme(axis.text.x = element_text(angle = 90))+
labs(title = "Relationship between Away Teams and their Possession",
x = "Away Teams",
y = "Possession")+
coord_flip()+
theme_bw()+
theme(legend.position="none")
ggplotly(awayPossession)
# Tree map that shows what proportion of points each team won.
treemap(SeasonTable, index=c("Teams"),vSize="Points", vColor = "GF", palette = "Set3",
fontsize.labels=c(8), fontcolor.labels=c("black"), border.col=c("white"))

# Grouping Home and Away teams to get how many goals each scored in home and away matches respectively. We then merge them together to get the number of home goals and away goals for each team.
seasonHomeGoals <- SeasonResult %>% group_by(HomeTeam) %>%
summarize(GoalsHome = sum(home_team_goal, na.rm = T))
seasonAwayGoals <- SeasonResult %>% group_by(AwayTeam) %>%
summarize(GoalsAway = sum(away_team_goal, na.rm = T))
seasonGoals <- seasonHomeGoals %>% left_join(seasonAwayGoals,
by = c("HomeTeam"="AwayTeam"))
seasonGoals <- seasonGoals %>%
rename("Team" = "HomeTeam")
seasonGoals
## # A tibble: 20 × 3
## Team GoalsHome GoalsAway
## <chr> <int> <int>
## 1 Athletic Club de Bilbao 32 27
## 2 Atlético Madrid 35 27
## 3 CA Osasuna 28 17
## 4 FC Barcelona 46 49
## 5 Getafe CF 33 16
## 6 Hércules Club de Fútbol 27 9
## 7 Levante UD 25 16
## 8 Málaga CF 29 25
## 9 Racing Santander 25 16
## 10 RC Deportivo de La Coruña 22 9
## 11 RCD Espanyol 33 13
## 12 RCD Mallorca 25 16
## 13 Real Madrid CF 61 41
## 14 Real Sociedad 27 22
## 15 Real Sporting de Gijón 23 12
## 16 Real Zaragoza 26 14
## 17 Sevilla FC 35 27
## 18 UD Almería 23 13
## 19 Valencia CF 34 30
## 20 Villarreal CF 33 21
# The table we got was in a wide form, but to get at stacked bar chart, we need it in a longer format. Hence changing it to a longer format.
seasonGoalsL <- seasonGoals %>% pivot_longer(cols = c("GoalsHome":"GoalsAway"),
names_to = "Goals",
values_to = "Count")
seasonGoalsL
## # A tibble: 40 × 3
## Team Goals Count
## <chr> <chr> <int>
## 1 Athletic Club de Bilbao GoalsHome 32
## 2 Athletic Club de Bilbao GoalsAway 27
## 3 Atlético Madrid GoalsHome 35
## 4 Atlético Madrid GoalsAway 27
## 5 CA Osasuna GoalsHome 28
## 6 CA Osasuna GoalsAway 17
## 7 FC Barcelona GoalsHome 46
## 8 FC Barcelona GoalsAway 49
## 9 Getafe CF GoalsHome 33
## 10 Getafe CF GoalsAway 16
## # … with 30 more rows
# Visualizing the seasonGoalsL table with a stacked bar chart.
seasonGoalsL %>% ggplot()+
geom_bar(aes(reorder(Team, Count), Count, fill=Goals), position="stack", stat="identity")+
scale_fill_viridis(discrete = T)+
coord_flip()+
theme_bw()+
labs(title = "Home Goals and Away Goals for Teams in the League for given Season",
x = "Teams",
y = "Number of Goals")+
scale_y_continuous(breaks = seq(0,150,10))+
theme(legend.position = "bottom", text = element_text(size = 10))

#creating a function that takes variable amount of parameters. In this case any team that played in the season and league passed in the seasonalData function.
stagePointsGoals <- function(names){
allTeams <- list(names)
Team <- c()
Points <- c()
TotalGoalsFor <- c()
TotalGoalsAgainst <- c()
Stage <- c()
Shots <- c()
Goal <- c()
df <- data.frame(Team, Points, Stage, TotalGoalsFor, TotalGoalsAgainst, Shots, Goal) # New data frame called "df"
# For each team passed as argument in the function we populate the table one team after another.
for(name in names){
subs <- SeasonResult[SeasonResult$HomeTeam == name | SeasonResult$AwayTeam == name,] # Selecting games that the team played, Home or Away
for (i in 1:nrow(subs)){ #Going through each of those games
if (subs$HomeTeam[i] == name){
df[nrow(df) + 1,1] <- name
df[nrow(df),2] <- subs$HomePoints[i]
df[nrow(df),3] <- subs$stage[i]
df[nrow(df),4] <- subs$HomeGoalsFor[i]
df[nrow(df),5] <- subs$HomeGoalsAgainst[i]
df[nrow(df),6] <- subs$on_target_shot_home_team[i]+subs$off_target_shot_home_team[i]
df[nrow(df),7] <- subs$home_team_goal[i]
}else if(subs$AwayTeam[i] == name){
df[nrow(df) + 1,1] <- name
df[nrow(df),2] <- subs$AwayPoints[i]
df[nrow(df),3] <- subs$stage[i]
df[nrow(df),4] <- subs$AwayGoalsFor[i]
df[nrow(df),5] <- subs$AwayGoalsAgainst[i]
df[nrow(df),6] <- subs$on_target_shot_away_team[i]+subs$off_target_shot_away_team[i]
df[nrow(df),7] <- subs$away_team_goal[i]
}
}
}
colnames(df) <- c("Team", "Points", "Stage", "TotalGoalsFor", "TotalGoalsAgainst", "Shots", "Goal") #For some reason the variable names changed to random names. Renaming them.
return(df)
}
# Using the stagePointGoals function we get match by match statistics for each team we passed in the argument.
selected<-stagePointsGoals(c("FC Barcelona", "Real Madrid CF", "Sevilla FC", "RCD Espanyol", "Real Sporting de Gijón"))
# Using the selected table, creating a line charts for points throughout each stages for each teams.
pointLines <-
selected %>%
ggplot()+
stat_summary(aes(x = Stage,
y = Points,
group = Team, color = Team),
geom = "line", linewidth = 0.5) +
labs(title = "Total Points per Stage throughout the season",
x = "Stage",
y = "Points")+
theme_light()+
scale_y_continuous(breaks = seq(0,100,10))+
scale_x_continuous(breaks = seq(0,38,2))+
theme(legend.position = "bottom")
ggplotly(pointLines)
## No summary function supplied, defaulting to `mean_se()`
# Using the selected table, creating a line charts for goals scored throughout each stages by each teams.
goalLines <- selected %>% ggplot() +
stat_summary(aes(x = Stage,
y = TotalGoalsFor,
group = Team, color = Team),
geom = "line", linewidth = 0.5) +
labs(title = "Total Goals per Stage throughout the season",
x = "Stage",
y = "Goals")+
theme_light()+
scale_y_continuous(breaks = seq(0,100,10))+
scale_x_continuous(breaks = seq(0,38,2))+
theme(legend.position = "bottom")
ggplotly(goalLines)
## No summary function supplied, defaulting to `mean_se()`
# Creating a stacked area chart to get the information on how many goals each team scored throughout the season and also the total goal contribution of those teams in the season.
ggplot(selected, aes(x=Stage, y=TotalGoalsFor, fill=Team))+
geom_area(alpha=0.8 , size=.5, colour="white")+
labs(title = "Total Goals Scored per Stage throughout the season",
x = "Stage",
y = "Total Goals For")+
scale_fill_viridis(discrete = T)+
theme_ipsum()+
theme_bw()+
scale_y_continuous(breaks = seq(0,500,40))+
scale_x_continuous(breaks = seq(0,38,2)) +
theme(legend.position = "bottom")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

# Creating a stacked area chart to get the information on how many goals each team conceded throughout the season and also the total goal contribution of those teams in the season.
ggplot(selected, aes(x=Stage, y=TotalGoalsAgainst, fill=Team))+
geom_area(alpha=0.8 , size=.5, colour="white")+
labs(title = "Total Goals Conceded per Stage throughout the season",
x = "Stage",
y = "Total Goals Against")+
scale_fill_viridis(discrete = T)+
theme_ipsum()+
theme_bw()+
scale_y_continuous(breaks = seq(0,500,40))+
scale_x_continuous(breaks = seq(0,38,2)) +
theme(legend.position = "bottom")
